library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0     ✓ purrr   0.3.4
## ✓ tibble  3.0.1     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:dplyr':
## 
##     intersect, setdiff, union
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(forcats)
library(vistime)
metadata <- read_csv("../data/metadata.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   DATE = col_character(),
##   WDW_TICKET_SEASON = col_logical(),
##   SEASON = col_character(),
##   HOLIDAYN = col_character(),
##   WDWTICKETSEASON = col_logical(),
##   WDWRaceN = col_character(),
##   WDWeventN = col_character(),
##   WDWSEASON = col_character(),
##   MKeventN = col_character(),
##   EPeventN = col_character(),
##   HSeventN = col_logical(),
##   AKeventN = col_logical(),
##   HOLIDAYJ = col_character(),
##   inSession = col_character(),
##   inSession_Enrollment = col_character(),
##   inSession_wdw = col_character(),
##   inSession_dlr = col_character(),
##   inSession_sqrt_WDW = col_character(),
##   inSession_sqrt_DLR = col_character(),
##   inSession_California = col_character()
##   # ... with 77 more columns
## )
## See spec(...) for full column specifications.
## Warning: 13258 parsing failures.
## row         col   expected   actual                   file
##   1 MKCLOSE     valid date 24:00:00 '../data/metadata.csv'
##   1 MKEMHCLOSE  valid date 27:00:00 '../data/metadata.csv'
##   1 MKCLOSEYEST valid date 26:00:00 '../data/metadata.csv'
##   1 MKCLOSETOM  valid date 24:00:00 '../data/metadata.csv'
##   1 EPCLOSEYEST valid date 25:00:00 '../data/metadata.csv'
## ... ........... .......... ........ ......................
## See problems(...) for more details.
metadata$DATE <- format(as.POSIXct(mdy(metadata$DATE) + 1, format = '%m/%d/%Y %H:%M:%S'), format='%m/%d/%Y')
wdw_metadata <- metadata %>%
  select(DATE, SEASON, HOLIDAYPX, HOLIDAYN, WDWMAXTEMP, WDWMINTEMP, WDWMEANTEMP, HOLIDAYJ, WEATHER_WDWPRECIP)
mk_metadata <- metadata %>%
  select(DATE, MKOPEN, MKCLOSE, MKEMHOPEN, MKEMHCLOSE, MKPRDDT1, MKPRDDT2, MKPRDNT1, MKPRDNT2, MKFIRET1, MKFIRET2)
ep_metadata <- metadata %>%
  select(DATE, EPOPEN, EPCLOSE, EPEMHOPEN, EPEMHCLOSE, EPFIRET1, EPFIRET2)
hs_metadata <- metadata %>%
  select(DATE, HSOPEN, HSCLOSE, HSEMHOPEN, HSEMHCLOSE, HSPRDDT1, HSFIRET1, HSFIRET2, HSSHWNT1, HSSHWNT2)
ak_metadata <- metadata %>%
  select(DATE, AKOPEN, AKCLOSE, AKEMHOPEN, AKEMHCLOSE, AKPRDDT1, AKPRDDT2, AKSHWNT1, AKSHWNT2)
wdw_metadata$HOLIDAYPX <- ifelse(wdw_metadata$HOLIDAYPX > 0, wdw_metadata$HOLIDAYPX - 1, wdw_metadata$HOLIDAYPX)
wdw_metadata$SEASON <- ifelse(wdw_metadata$SEASON == "MARTIN LUTHER KING JUNIOR DAY", "MLK DAY", wdw_metadata$SEASON)
xhot_days <- wdw_metadata %>%
  select(DATE, WDWMEANTEMP) %>%
  filter(WDWMEANTEMP >= 85) %>%
  mutate(temp_cat = "xhot_days")

hot_days <- wdw_metadata %>%
  select(DATE, WDWMEANTEMP) %>%
  filter(WDWMEANTEMP < 85 & WDWMEANTEMP >= 79.8) %>%
  mutate(temp_cat = "hot_days")

normal_days <- wdw_metadata %>%
  select(DATE, WDWMEANTEMP) %>%
  filter(WDWMEANTEMP >= 71.3 & WDWMEANTEMP < 79.8) %>%
  mutate(temp_cat = "normal_days")

cool_days <- wdw_metadata %>%
  select(DATE, WDWMEANTEMP) %>%
  filter(WDWMEANTEMP >= 62.8 & WDWMEANTEMP < 71.3) %>%
  mutate(temp_cat = "cool_days")

xcool_days <- wdw_metadata %>%
  select(DATE, WDWMEANTEMP) %>%
  filter(WDWMEANTEMP < 62.8) %>%
  mutate(temp_cat = "xcool_days")

temp_days <- bind_rows(xhot_days, hot_days, normal_days, cool_days, xcool_days)
park_colors <- c("darkgreen", "cornflowerblue", "chocolate1", "blueviolet")
temp_colors <- c("red", "orange", "yellow", "blue", "cyan")

temp_list_names <- c("xhot_days", "hot_days", "normal_days", "cool_days", "xcool_days")
days_of_week <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")
dwarfs_train <- read_csv("../data/7_dwarfs_train.csv")
## Parsed with column specification:
## cols(
##   date = col_character(),
##   datetime = col_datetime(format = ""),
##   SPOSTMIN = col_double(),
##   SACTMIN = col_double()
## )
alien_saucers <- read_csv("../data/alien_saucers.csv")
## Parsed with column specification:
## cols(
##   date = col_character(),
##   datetime = col_datetime(format = ""),
##   SPOSTMIN = col_double(),
##   SACTMIN = col_double()
## )
dinosaur <- read_csv("../data/dinosaur.csv")
## Parsed with column specification:
## cols(
##   date = col_character(),
##   datetime = col_datetime(format = ""),
##   SPOSTMIN = col_double(),
##   SACTMIN = col_double()
## )
expedition_everest <- read_csv("../data/expedition_everest.csv")
## Parsed with column specification:
## cols(
##   date = col_character(),
##   datetime = col_datetime(format = ""),
##   SPOSTMIN = col_double(),
##   SACTMIN = col_double()
## )
flight_of_passage <- read_csv("../data/flight_of_passage.csv")
## Parsed with column specification:
## cols(
##   date = col_character(),
##   datetime = col_datetime(format = ""),
##   SPOSTMIN = col_double(),
##   SACTMIN = col_double()
## )
kilimanjaro_safaris <- read_csv("../data/kilimanjaro_safaris.csv")
## Parsed with column specification:
## cols(
##   date = col_character(),
##   datetime = col_datetime(format = ""),
##   SPOSTMIN = col_double(),
##   SACTMIN = col_double()
## )
navi_river <- read_csv("../data/navi_river.csv")
## Parsed with column specification:
## cols(
##   date = col_character(),
##   datetime = col_datetime(format = ""),
##   SPOSTMIN = col_double(),
##   SACTMIN = col_double()
## )
pirates_of_caribbean <- read_csv("../data/pirates_of_caribbean.csv")
## Parsed with column specification:
## cols(
##   date = col_character(),
##   datetime = col_datetime(format = ""),
##   SPOSTMIN = col_double(),
##   SACTMIN = col_double()
## )
rock_n_rollercoaster <- read_csv("../data/rock_n_rollercoaster.csv")
## Parsed with column specification:
## cols(
##   date = col_character(),
##   datetime = col_datetime(format = ""),
##   SPOSTMIN = col_double(),
##   SACTMIN = col_double()
## )
slinky_dog <- read_csv("../data/slinky_dog.csv")
## Parsed with column specification:
## cols(
##   date = col_character(),
##   datetime = col_datetime(format = ""),
##   SPOSTMIN = col_double(),
##   SACTMIN = col_double()
## )
soarin <- read_csv("../data/soarin.csv")
## Parsed with column specification:
## cols(
##   date = col_character(),
##   datetime = col_datetime(format = ""),
##   SPOSTMIN = col_double(),
##   SACTMIN = col_double()
## )
spaceship_earth <- read_csv("../data/spaceship_earth.csv")
## Parsed with column specification:
## cols(
##   date = col_character(),
##   datetime = col_datetime(format = ""),
##   SPOSTMIN = col_double(),
##   SACTMIN = col_double()
## )
splash_mountain <- read_csv("../data/splash_mountain.csv")
## Parsed with column specification:
## cols(
##   date = col_character(),
##   datetime = col_datetime(format = ""),
##   SPOSTMIN = col_double(),
##   SACTMIN = col_double()
## )
toy_story_mania <- read_csv("../data/toy_story_mania.csv")
## Parsed with column specification:
## cols(
##   date = col_character(),
##   datetime = col_datetime(format = ""),
##   SPOSTMIN = col_double(),
##   SACTMIN = col_double()
## )
rides <- list("dwarfs_train" = dwarfs_train, 
              "alien_saucers" = alien_saucers,
              "dinosaur" = dinosaur,
              "expedition_everest" = expedition_everest,
              "flight_of_passage" = flight_of_passage,
              "kilimanjaro_safaris" = kilimanjaro_safaris,
              "navi_river" = navi_river,
              "pirates_of_caribbean" = pirates_of_caribbean,
              "rock_n_rollercoaster" = rock_n_rollercoaster,
              "slinky_dog" = slinky_dog,
              "soarin" = soarin,
              "spaceship_earth" = spaceship_earth,
              "splash_mountain" = splash_mountain,
              "toy_story_mania" = toy_story_mania)
for (i in 1:14) {
  rides[[i]] <- rides[[i]] %>%
      mutate(time = format(ymd_hms(datetime), "%H:%M:%S"))
  rides[[i]] <- rides[[i]] %>%
    mutate(ride_name = as.factor(names(rides[i])))
    rides[[i]] <- rides[[i]] %>%
    filter(SPOSTMIN != -999 | is.na(SPOSTMIN))
}
ride_name <- c("dwarfs_train", "alien_saucers", "dinosaur", "expedition_everest", "flight_of_passage", 
               "kilimanjaro_safaris", "navi_river", "pirates_of_caribbean", "rock_n_rollercoaster", "slinky_dog", 
               "soarin", "spaceship_earth", "splash_mountain", "toy_story_mania")
open_date <- as.POSIXct(c("2014/05/28", "2018/06/30", "1998/04/22", "2006/04/09", "2017/05/27", 
                       "1998/04/22", "2017/05/27", "1973/12/17", "1999/07/29", "2018/06/30", 
                       "2005/05/15", "1982/10/01", "1992/07/17", "2008/05/31"))
splash <- c(FALSE, FALSE, FALSE, FALSE, TRUE,
            FALSE, FALSE, TRUE,  FALSE, FALSE,
            FALSE, FALSE, TRUE,  FALSE)
indoor <- c(FALSE, FALSE, TRUE,  FALSE, TRUE,
            FALSE, TRUE,  TRUE,  TRUE,  FALSE,
            TRUE,  TRUE,  FALSE, TRUE)
age_hierarchy <- c(10, 13,  4,  8, 11,
                    5, 12,  1,  6, 14,
                    7,  2,  3,  9)
park <- c("mk", "hs", "ak", "ak", "ak", 
          "ak", "ak", "mk", "hs", "hs", 
          "ep", "ep", "mk", "hs")
ride_metadata <- data.frame(ride_name, open_date, age_hierarchy, splash, indoor, park)
rides_df <- rides[[1]]
for (i in 2:14) {
  rides_df <- rbind(rides_df, rides[[i]])
}
temps_df <- rides_df %>%
  inner_join(temp_days, by = c("date" = "DATE")) %>%
  group_by(ride_name, temp_cat) %>%
  summarise(mean_wait = mean(SPOSTMIN, na.rm = TRUE))
rides_df %>%
  filter(datetime > as.POSIXct("2018-06-30")) %>%
  ggplot(aes(x = SPOSTMIN, y = ride_name)) +
  geom_boxplot(na.rm = TRUE, outlier.shape = "circle", outlier.alpha = .1, size = 1) + 
  coord_cartesian(xlim = c(0,300)) +
  geom_vline(xintercept = c(30, 60, 90, 120), color = c("green", "yellow", "orange", "red"), size = 1) +
  labs(title = "Boxplot of mean estimated wait time by ride") +
  xlab("Wait time in minutes") +
  ylab("Ride name")

rides_df %>% group_by(ride_name) %>%
  filter(datetime > as.POSIXct("2018-06-30")) %>%
  ggplot(aes(x = SACTMIN, y = ride_name)) +
  geom_boxplot(na.rm = TRUE, outlier.shape = "circle", outlier.alpha = .1, size = 1) +
  coord_cartesian(xlim = c(0, 300)) +
  geom_vline(xintercept = c(30, 60, 90, 120), color = c("green", "yellow", "orange", "red"), size = 1) +
  labs(title = "Boxplot of mean user wait time by ride") +
  xlab("Wait time in minutes") +
  ylab("Ride name")

park_averages <- rides_df %>%
  inner_join(ride_metadata) %>%
  group_by(park) %>%
  summarise(park_averages = mean(SPOSTMIN, na.rm = TRUE))
## Joining, by = "ride_name"
## Warning: Column `ride_name` joining factors with different levels, coercing to
## character vector
rides_df %>%
  inner_join(ride_metadata) %>%
  mutate(weekday = weekdays(datetime)) %>%
  group_by(ride_name, park) %>%
  summarise(mean_wait = mean(SPOSTMIN, na.rm = TRUE)) %>%
  ggplot() +
  geom_col(aes(x = park, y = mean_wait, fill = ride_name), position = "dodge") +
  labs(title = "Column chart of mean estimated wait time by ride, grouped by park", fill = "Ride name") +
  xlab("Park (Animal Kingdom, Epcot, Hollywood Studios, Magic Kingdom)") +
  ylab("Mean wait (minutes)")
## Joining, by = "ride_name"
## Warning: Column `ride_name` joining factors with different levels, coercing to
## character vector

rides_df %>%
  mutate(weekday = weekdays(datetime)) %>%
  group_by(weekday, ride_name) %>%
  summarise(mean_wait = mean(SPOSTMIN, na.rm = TRUE)) %>%
  ggplot() +
  geom_col(aes(x = ordered(weekday, levels = days_of_week), y = mean_wait)) +
  labs(title = "Mean estimated wait time by day for each ride") +
  xlab("Day of the week") +
  ylab("Mean wait (minutes)") +
  theme(axis.text.x = element_text(angle = 90)) +
  facet_wrap(~ ride_name)

rides_df %>%
  mutate(weekday = weekdays(datetime)) %>%
  group_by(weekday, ride_name) %>%
  summarise(mean_wait = mean(SPOSTMIN, na.rm = TRUE)) %>%
  ggplot(aes(x = ordered(weekday, levels = days_of_week), y = mean_wait, fill = ride_name)) +
  geom_col() +
  labs(title = "Mean combined estimated wait time by day of week") +
  xlab("Day of the week") +
  ylab("Mean wait (minutes)")

rides_df %>%
  inner_join(ride_metadata) %>%
  mutate(weekday = weekdays(datetime)) %>%
  group_by(weekday, park) %>%
  summarise(mean_wait = mean(SPOSTMIN, na.rm = TRUE)) %>%
  ggplot() +
  geom_col(aes(x = ordered(weekday, levels = days_of_week), y = mean_wait, fill = park), position = "fill") +
  labs(title = "Mean estimated wait time by park as a portion of total wait time by day", fill = "Park") +
  xlab("Day of the week") +
  ylab("Portion of total wait time")
## Joining, by = "ride_name"
## Warning: Column `ride_name` joining factors with different levels, coercing to
## character vector

wdw_metadata_2019 <- wdw_metadata %>%
  filter(year(as.Date(mdy(DATE))) == 2019)

seasons <- c("WINTER", "MLK DAY", "PRESIDENTS WEEK", "SPRING", "MARDI GRAS", "EASTER", "MEMORIAL DAY", "SUMMER BREAK",
             "JULY 4TH", "SEPTEMBER LOW", "FALL", "COLUMBUS DAY", "HALLOWEEN", "JERSEY WEEK", "THANKSGIVING", "CHRISTMAS", "CHRISTMAS PEAK")
wdw_metadata_2019 %>%
  ggplot() +
  geom_bar(aes(x = HOLIDAYPX)) +
  labs(title = "Histogram of proximity of a day at the park to a holiday in 2019") +
  xlab("Proximity (days") +
  ylab("Count")

wdw_metadata_2019 %>%
  group_by(SEASON) %>%
  ggplot() +
  geom_bar(aes(y = ordered(SEASON, levels = rev(seasons)), fill = as.factor(HOLIDAYPX))) +
  labs(title = "Proximity to a holiday in any given portion of a ticket season", fill = "\nDays from a holiday") +
  xlab("Total days in a season") +
  ylab("Seasons")

wdw_metadata %>%
  inner_join(rides_df, by = c("DATE" = "date")) %>%
  group_by(HOLIDAYPX) %>%
  summarise(mean_wait = mean(SPOSTMIN, na.rm = TRUE)) %>%
  ggplot() +
  geom_col(aes(x = HOLIDAYPX, y = mean_wait)) +
  labs(title = "Mean estimated wait time by proximity to a holiday") +
  xlab("Proximity (days)") +
  ylab("Wait time (minutes)")

rides_df %>%
  mutate(year = year(as.POSIXct(mdy(date) + 1)), month = month(as.POSIXct(mdy(date) + 1)), day = weekdays(as.POSIXct(mdy(date) + 1)),
         monthweek = ceiling(day(mdy(date)) / 7)) %>%
  group_by(month, day, monthweek) %>%
  summarise(mean_wait = mean(SPOSTMIN, na.rm = TRUE)) %>% 
  ggplot(aes(x = monthweek, y = ordered(day, levels = rev(days_of_week)), fill = mean_wait)) +
  geom_tile(color = "grey") +
  facet_wrap(~month) +
  scale_fill_distiller(palette = "Spectral") +
  labs(fill = "Wait (min)", title = "Mean wait time by day of the year for all rides", x = "Week of the month", y = "")

rides_df %>%
  mutate(year = year(as.POSIXct(mdy(date) + 1)), month = month(as.POSIXct(mdy(date) + 1)), day = wday(as.POSIXct(mdy(date) + 1)),
         monthweek = ceiling(day(mdy(date)) / 7)) %>%
  group_by(month, day, monthweek) %>%
  summarise(mean_wait = mean(SPOSTMIN, na.rm = TRUE)) %>% 
  ggplot(aes(x = monthweek, y = ordered(day, levels = days_of_week), fill = mean_wait)) +
  geom_tile(color = "grey") +
  facet_grid(~month) +
  scale_fill_distiller(palette = "Spectral") +
  labs(fill = "Wait (min)", title = "Mean wait time by week of the year for all rides", x = "Week of the month", y = "")

temp_illustration <- data.frame(bucket = c("x < 62.8", "62.8 <= x < 71.3", "71.3 <= x < 79.8", "79.8 >= x > 85", "x >= 85"),
                                name = c("xcool_days", "cool_days", "normal_days", "hot_days", "xhot_days"),
                                start = c("32-01-01", "62-09-18", "71-03-18", "79-09-18", "85-01-01"),
                                end = c("62-09-18", "71-03-18", "79-09-18", "85-01-01", "100-01-01"),
                                color = c("cyan", "blue", "yellow", "orange", "red"))

vistime(temp_illustration, groups = "name", events = "bucket", title = "Temperature buckets")
temps_df %>%
  group_by(ride_name) %>%
  ggplot() +
  geom_col(aes(x = ride_name, y = mean_wait, fill = fct_relevel(temp_cat, temp_list_names)), position = "dodge") +
  scale_fill_manual(values = temp_colors) +
  coord_flip() +
  labs(fill = "Temp cat") +
  theme(legend.justification=c(1,1), legend.position=c(1,1)) +
  labs(title = "Mean estimated wait time for each ride at a temperature", fill = "Temperature") +
  ylab("Mean wait (minutes)") +
  xlab("Ride")

temps_df %>%
  inner_join(ride_metadata) %>%
  filter(splash == TRUE) %>%
  group_by(ride_name) %>%
  ggplot() +
  geom_col(aes(x = ride_name, y = mean_wait, fill = fct_relevel(temp_cat, temp_list_names)), position = "dodge") +
  scale_fill_manual(values = temp_colors) +
  coord_flip() +
  labs(fill = "Temp cat") +
  theme(legend.justification=c(1,1), legend.position=c(1,1)) +
  labs(title = "Mean estimated wait for rides with a splash feature at a temperature", fill = "Temperature") +
  ylab("Mean wait (minutes)") +
  xlab("Ride")
## Joining, by = "ride_name"
## Warning: Column `ride_name` joining factors with different levels, coercing to
## character vector

temps_df %>%
  inner_join(ride_metadata) %>%
  filter(indoor == TRUE) %>%
  group_by(ride_name) %>%
  ggplot() +
  geom_col(aes(x = ride_name, y = mean_wait, fill = fct_relevel(temp_cat, temp_list_names)), position = "dodge") +
  scale_fill_manual(values = temp_colors) +
  coord_flip() +
  labs(fill = "Temp cat") +
  theme(legend.justification=c(1,1), legend.position=c(1,1)) +
  labs(title = "Mean estimated wait time for indoor rides at a temperature", fill = "Temperature") +
  ylab("Mean wait (minutes)") +
  xlab("Ride")
## Joining, by = "ride_name"
## Warning: Column `ride_name` joining factors with different levels, coercing to
## character vector

temps_df %>%
  inner_join(ride_metadata) %>%
  filter(splash == TRUE & indoor == TRUE) %>%
  group_by(ride_name) %>%
  ggplot() +
  geom_col(aes(x = ride_name, y = mean_wait, fill = fct_relevel(temp_cat, temp_list_names)), position = "dodge") +
  scale_fill_manual(values = temp_colors) +
  coord_flip() +
  labs(fill = "Temp cat") +
  theme(legend.justification=c(1,1), legend.position=c(1,1)) +
  labs(title = "Mean estimated wait time for indoor + splash rides at a temperature", fill = "Temperature") +
  ylab("Mean wait (minutes)") +
  xlab("Ride")
## Joining, by = "ride_name"
## Warning: Column `ride_name` joining factors with different levels, coercing to
## character vector

rides_df %>%
  inner_join(ride_metadata) %>%
  filter(park == "ak") %>%
  inner_join(ak_metadata, by = c("date" = "DATE")) %>%
  mutate(time = format(round_date(ymd_hms(datetime), "30 minutes"), "%H:%M")) %>%
  filter(hm(time) > AKOPEN) %>%
  group_by(time, ride_name) %>%
  summarise(mean_wait = mean(SPOSTMIN, na.rm = TRUE)) %>%
  ggplot() +
  geom_col(aes(x = time, y = mean_wait), width = 1) +
  theme(axis.text.x = element_text(angle = 90)) +
  facet_grid(rows = vars(ride_name)) +
  labs(title = "Mean estimated wait time by time of day") +
  xlab("Time") +
  ylab("Wait time (minutes)")
## Joining, by = "ride_name"
## Warning: Column `ride_name` joining factors with different levels, coercing to
## character vector

rides_df %>%
  inner_join(ride_metadata) %>%
  filter(park == "ep") %>%
  inner_join(ep_metadata, by = c("date" = "DATE")) %>%
  mutate(time = format(round_date(ymd_hms(datetime), "30 minutes"), "%H:%M")) %>%
  filter(hm(time) > EPOPEN) %>%
  group_by(time, ride_name) %>%
  summarise(mean_wait = mean(SPOSTMIN, na.rm = TRUE)) %>%
  ggplot() +
  geom_col(aes(x = time, y = mean_wait), width = 1) +
  theme(axis.text.x = element_text(angle = 90)) +
  facet_grid(rows = vars(ride_name)) +
  labs(title = "Mean estimated wait time by time of day") +
  xlab("Time") +
  ylab("Wait time (minutes)")
## Joining, by = "ride_name"
## Warning: Column `ride_name` joining factors with different levels, coercing to
## character vector

rides_df %>%
  inner_join(ride_metadata) %>%
  filter(park == "hs") %>%
  inner_join(hs_metadata, by = c("date" = "DATE")) %>%
  mutate(time = format(round_date(ymd_hms(datetime), "30 minutes"), "%H:%M")) %>%
  filter(hm(time) > HSOPEN) %>%
  group_by(time, ride_name) %>%
  summarise(mean_wait = mean(SPOSTMIN, na.rm = TRUE)) %>%
  ggplot() +
  geom_col(aes(x = time, y = mean_wait), width = 1) +
  theme(axis.text.x = element_text(angle = 90)) +
  facet_grid(rows = vars(ride_name)) +
  labs(title = "Mean estimated wait time by time of day") +
  xlab("Time") +
  ylab("Wait time (minutes)")
## Joining, by = "ride_name"
## Warning: Column `ride_name` joining factors with different levels, coercing to
## character vector

rides_df %>%
  filter(year(mdy(date)) == 2019) %>%
  inner_join(ride_metadata) %>%
  filter(park == "mk") %>%
  inner_join(mk_metadata, by = c("date" = "DATE")) %>%
  mutate(time = format(round_date(ymd_hms(datetime), "30 minutes"), "%H:%M")) %>%
  filter(hm(time) > MKOPEN) %>%
  group_by(time, ride_name, MKPRDDT1) %>%
  summarise(mean_wait = mean(SPOSTMIN, na.rm = TRUE)) %>%
  ggplot() +
  geom_col(aes(x = time, y = mean_wait), width = 1) +
  theme(axis.text.x = element_text(angle = 90)) +
  facet_grid(rows = vars(ride_name)) +
  labs(title = "Mean estimated wait time by time of day") +
  xlab("Time") +
  ylab("Wait time (minutes)")
## Joining, by = "ride_name"
## Warning: Column `ride_name` joining factors with different levels, coercing to
## character vector

wdw_metadata %>%
  mutate(year = year(as.POSIXct(mdy(DATE) + 1)), month = month(as.POSIXct(mdy(DATE) + 1)), day = weekdays(as.POSIXct(mdy(DATE) + 1)),
         monthweek = ceiling(day(mdy(DATE)) / 7)) %>%
  group_by(month, day, monthweek) %>%
  summarise(mean_rain = mean(WEATHER_WDWPRECIP, na.rm = TRUE)) %>% 
  ggplot(aes(x = monthweek, y = day, fill = mean_rain)) +
  geom_tile(color = "grey") +
  facet_grid(~month) +
  scale_fill_gradient2(low = "cyan", mid = "deepskyblue4", high = "navy", midpoint = .2) +
  labs(fill = "Rain (in)", title = "Mean historical rainfall by day", x = "Week of the month", y = "")